sn-ENS neuron integration

test markers for CRE so that subtypes could be manipulated like specific activation or blockage

try entropy score H, also calculate Q|t for each subtype

source("/Shared_win/projects/RNA_normal/analysis.10x.r")

load integrated obj

GEX.seur <- readRDS("../integration_Nb5d/sn10x_WYS.sct_anno.s.rds")
GEX.seur
## An object of class Seurat 
## 47356 features across 19418 samples within 3 assays 
## Active assay: SCT (20434 features, 0 variable features)
##  2 other assays present: RNA, integrated
##  3 dimensional reductions calculated: pca, tsne, umap
ref.seur <- readRDS("../../20230704_10x_SZJ/analysis_ref/GSE149524.P21.integration_Anno.s.rds")
ref.seur
## An object of class Seurat 
## 37583 features across 4419 samples within 3 assays 
## Active assay: SCT (16365 features, 0 variable features)
##  2 other assays present: RNA, integrated
##  3 dimensional reductions calculated: pca, tsne, umap
# define intAnno1/2 colors
color.A1 <- c("#678BB1","#8AB6CE","#3975C1","#669FDF","#4CC1BD",
              "#BF7E6B","#D46B35","#F19258","#FF8080",
              "#BDAE8D","#BD66C4","#C03778",
              "#97BA59","#DFAB16","#2BA956","#9FE727")

color.A2 <- c("#678BB1","#8AB6CE","#3975C1","#669FDF","#4CC1BD",
              "#BF7E6B","#D46B35","#F19258","#FF8080",
              "#BDAE8D","#BD66C4","#C03778",
              "#97BA59","#C4D116", "#DFAB16","#EDE25A", "#2BA956","#9FE727")


# define batch/condition colors
color.cnt <- scales::hue_pal()(4)[c(2,1,4,3)]

color.test1 <- color.cnt[1:2]
color.test2 <- color.cnt[3:4]

## define feature colors
# Cell2020     
material.heat = function(n){
  colorRampPalette(
    c(
      "#283593", # indigo 800
      "#3F51B5", # indigo
      "#2196F3", # blue
      "#00BCD4", # cyan
      "#4CAF50", # green
      "#8BC34A", # light green
      "#CDDC39", # lime
      "#FFEB3B", # yellow
      "#FFC107", # amber
      "#FF9800", # orange
      "#FF5722"  # deep orange
    )
  )(n)
}

# Immunity2019, na gray
colors.Immunity <-c("#191970","#121285","#0C0C9A","#0707B0","#0101C5","#0014CF","#0033D3","#0053D8","#0072DD","#0092E1","#00B2E6",
                  "#00D1EB","#23E8CD","#7AF17B","#D2FA29","#FFEB00","#FFC300","#FF9B00","#FF8400","#FF7800","#FF6B00","#FF5F00","#FF5300",
                  "#FF4700","#F73B00","#EF2E00","#E62300","#DD1700","#D50B00","#CD0000")


# NatNeur2021, sc-neurons
color.ref <- c("#8AB6CE","#678BB1","#3975C1","#4CC1BD",
               "#C03778","#97BA59","#DFAB16","#BF7E6B",
               "#D46B35","#BDAE8D","#BD66C4","#2BA956",
               "#FF8080","#FF8080","#FF8080","#FF0000")
cowplot::plot_grid(
DimPlot(GEX.seur, reduction = "umap", group.by = "intAnno1", label = T, label.size = 3.25,repel = F, pt.size = 0.05,
        cols = color.A1),
  DimPlot(GEX.seur, label = F, pt.size = 0.05, repel = F, reduction = 'umap', group.by = "cnt",
        cols = color.cnt) ,
rel_widths = c(4.8,5),
ncol = 2)

cowplot::plot_grid(
DimPlot(GEX.seur, reduction = "umap", group.by = "intAnno2", label = T, label.size = 2.65,repel = F, pt.size = 0.05,
        cols = color.A2),
  DimPlot(GEX.seur, label = F, pt.size = 0.05, repel = F, reduction = 'umap', group.by = "cnt",
        cols = color.cnt) ,
rel_widths = c(4.85,5),
ncol = 2)

PBS_INF

test1.seur <- subset(GEX.seur, subset= cnt %in% c("Nb5d.PBS","Nb5d.INF"))
test1.seur
## An object of class Seurat 
## 47356 features across 8232 samples within 3 assays 
## Active assay: SCT (20434 features, 0 variable features)
##  2 other assays present: RNA, integrated
##  3 dimensional reductions calculated: pca, tsne, umap
cowplot::plot_grid(
DimPlot(test1.seur, reduction = "umap", group.by = "intAnno1", label = T, label.size = 3.25,repel = F, pt.size = 0.15,
        cols = color.A1),
  DimPlot(test1.seur, label = F, pt.size = 0.15, repel = F, reduction = 'umap', group.by = "cnt",
        cols = color.test1) ,
rel_widths = c(4.8,5),
ncol = 2)

cowplot::plot_grid(
DimPlot(test1.seur, reduction = "umap", group.by = "intAnno2", label = T, label.size = 2.65,repel = F, pt.size = 0.15,
        cols = color.A2),
  DimPlot(test1.seur, label = F, pt.size = 0.15, repel = F, reduction = 'umap', group.by = "cnt",
        cols = color.test1) ,
rel_widths = c(4.85,5),
ncol = 2)

PBS only

PBS.seur <- subset(GEX.seur, subset= cnt %in% c("Nb5d.PBS"))
PBS.seur
## An object of class Seurat 
## 47356 features across 4789 samples within 3 assays 
## Active assay: SCT (20434 features, 0 variable features)
##  2 other assays present: RNA, integrated
##  3 dimensional reductions calculated: pca, tsne, umap
cowplot::plot_grid(
DimPlot(PBS.seur, reduction = "umap", group.by = "intAnno1", label = T, label.size = 3.25,repel = F, pt.size = 0.15,
        cols = color.A1),
  DimPlot(PBS.seur, label = F, pt.size = 0.15, repel = F, reduction = 'umap', group.by = "cnt",
        cols = color.test1) ,
rel_widths = c(4.8,5),
ncol = 2)

cowplot::plot_grid(
DimPlot(PBS.seur, reduction = "umap", group.by = "intAnno2", label = T, label.size = 2.65,repel = F, pt.size = 0.15,
        cols = color.A2),
  DimPlot(PBS.seur, label = F, pt.size = 0.15, repel = F, reduction = 'umap', group.by = "cnt",
        cols = color.test1) ,
rel_widths = c(4.85,5),
ncol = 2)

CTL_CKO

#test2.seur <- subset(GEX.seur, subset= cnt %in% c("Nb5d.CTL","Nb5d.CKO"))
#test2.seur

markers

directly using markers in SCT_intAnno1
markers.new <- read.csv("../integration_Nb5d/Baf53cre_Nb.markers.SCT_intAnno1.202402.csv")
markers.new$cluster <- factor(as.character(markers.new$cluster),
                              levels = levels(GEX.seur$intAnno1))
head(markers.new)
##   p_val avg_log2FC pct.1 pct.2 p_val_adj cluster   gene
## 1     0   1.824018 0.953 0.335         0    EMN1  Ptprt
## 2     0   1.668551 0.981 0.495         0    EMN1  Tshz2
## 3     0   1.585436 0.938 0.319         0    EMN1   Bnc2
## 4     0   1.384775 0.909 0.397         0    EMN1  Grik1
## 5     0   1.301007 1.000 0.793         0    EMN1 Rbfox1
## 6     0   1.107801 0.992 0.864         0    EMN1  Negr1

filtering

cut.pct = 0.1
cut.padj = 0.001

markers.n <- (markers.new %>% filter(pct.1 > cut.pct & p_val_adj < cut.padj))$gene
markers.n <- unique(markers.n)

#head(markers.n)
length(markers.n)
## [1] 3109
DIG <- grep("^mt-|^Tra|^Trb|^Trg|^Trd|^Tcr|^Igm|^Igh|^Igk|^Igl|Jchain|Mzb1|Vpreb|^Hsp|^Rps|^Rpl|Hbb-|Hba-|^Dnaj|^Jun|^Fos|^AY|^AA|^AC|^AW|^BC|^Gm|^Hist|Lars2|Rik$|-ps",
            markers.n,value = T)
markers.n <- setdiff(markers.n, DIG)
length(markers.n)
## [1] 2679

calculation

avg.matrix

mat.n <- AverageExpression(GEX.seur,
                           assays = "SCT",
                           features = markers.n,
                           return.seurat = F,
                           group.by = "intAnno1",
                           layer="data")
## The following functions and any applicable methods accept the dots: CreateSeuratObject
# AverageExpression() would recover the log2-data, here add log2 back
mat.n <- log2(mat.n$SCT+1)
mat.n <- as.data.frame(mat.n)
dim(mat.n)
## [1] 2679   16
mat.n[1:5,]
##            EMN1     EMN2      EMN3      EMN4      EMN5      IMN1      IMN2
## Ptprt  2.655444 1.922185 0.6064187 0.1626202 0.2422009 0.1698824 0.1398116
## Tshz2  2.921009 2.455140 0.7958795 0.2954559 0.6207125 1.4444638 0.4100883
## Bnc2   2.440412 2.029988 1.1821320 1.5221357 2.1438358 0.1392112 0.1139562
## Grik1  2.632152 2.932916 1.7605224 1.3623424 0.6407503 0.4778998 0.5302975
## Rbfox1 4.464304 4.436291 3.4913102 2.5482965 1.2826152 1.0258514 1.3658601
##              IMN3       IMN4       IN1       IN2      IN3     IPAN1     IPAN2
## Ptprt  0.12504700 0.07246964 1.8134133 1.2097636 1.518071 0.9160788 0.8096295
## Tshz2  0.32598631 0.56145575 0.4183887 0.3957756 2.671051 0.7400137 0.8958165
## Bnc2   0.06803752 0.24100810 0.1766197 0.1069152 1.764711 0.1519077 0.8054275
## Grik1  0.38719387 0.41503750 0.3286227 0.1796715 0.971711 0.2279666 0.2410081
## Rbfox1 2.21907111 2.54357087 1.3383857 2.8662705 4.931885 4.4772219 2.3510499
##            IPAN3     IPAN4
## Ptprt  0.2529807 0.2324798
## Tshz2  0.6322682 0.7312857
## Bnc2   0.2326608 0.3030989
## Grik1  0.3504972 0.6286980
## Rbfox1 2.7970130 3.3886686

entropy

## shanno.entropy
# ref. https://davetang.org/muse/2013/08/28/tissue-specificity/
#
# Q version for specific tissue
#    https://www.ncbi.nlm.nih.gov/pmc/articles/PMC1088961/
#     'Measuring tissue specificity with entropy: Qg|t'
shannon.entropy <- function(p,returnH=TRUE,returnQ=FALSE){
  if (min(p) < 0 || sum(p) <= 0 || is.na(p[1])) return(NA)
  else {p.norm <- p[p>0]/sum(p)
  
  if(returnH==TRUE){
    return(-sum(log2(p.norm)*p.norm))
  }else if(returnQ==TRUE){
    return(-sum(log2(p.norm)*p.norm) - log2(p/sum(p)))
  }else{
    return(NA)
  }
  } 
}

H

ShE <- rep(NA,nrow(mat.n))

for(i in 1:nrow(mat.n)){
  ShE[i] <- shannon.entropy(as.vector(as.matrix(mat.n[i,])))
}
mat.n$ShE.H <- ShE
dim(mat.n)
## [1] 2679   17
mat.n[1:5,]
##            EMN1     EMN2      EMN3      EMN4      EMN5      IMN1      IMN2
## Ptprt  2.655444 1.922185 0.6064187 0.1626202 0.2422009 0.1698824 0.1398116
## Tshz2  2.921009 2.455140 0.7958795 0.2954559 0.6207125 1.4444638 0.4100883
## Bnc2   2.440412 2.029988 1.1821320 1.5221357 2.1438358 0.1392112 0.1139562
## Grik1  2.632152 2.932916 1.7605224 1.3623424 0.6407503 0.4778998 0.5302975
## Rbfox1 4.464304 4.436291 3.4913102 2.5482965 1.2826152 1.0258514 1.3658601
##              IMN3       IMN4       IN1       IN2      IN3     IPAN1     IPAN2
## Ptprt  0.12504700 0.07246964 1.8134133 1.2097636 1.518071 0.9160788 0.8096295
## Tshz2  0.32598631 0.56145575 0.4183887 0.3957756 2.671051 0.7400137 0.8958165
## Bnc2   0.06803752 0.24100810 0.1766197 0.1069152 1.764711 0.1519077 0.8054275
## Grik1  0.38719387 0.41503750 0.3286227 0.1796715 0.971711 0.2279666 0.2410081
## Rbfox1 2.21907111 2.54357087 1.3383857 2.8662705 4.931885 4.4772219 2.3510499
##            IPAN3     IPAN4    ShE.H
## Ptprt  0.2529807 0.2324798 3.359455
## Tshz2  0.6322682 0.7312857 3.583557
## Bnc2   0.2326608 0.3030989 3.280647
## Grik1  0.3504972 0.6286980 3.468434
## Rbfox1 2.7970130 3.3886686 3.862025
#write.csv(mat.n, "test_CREmarkers/matrix.avgExp_ShE.H.csv")
plot(sort(mat.n$ShE.H), 
     main= "",
     ylab = "Entropy Score",
     xlab = "Sorted Filtered Markers")
abline(h=1)
abline(h=1.5)
abline(h=2)
abline(h=2.5)
abline(h=3)
abline(h=3.5)

Q|t

# 
ShE.Q <- matrix(data = NA,
                nrow = length(markers.n),
                ncol = length(levels(markers.new$cluster)))

rownames(ShE.Q) <- markers.n
colnames(ShE.Q) <- levels(markers.new$cluster)

ShE.Q[1:5,1:5]
##        EMN1 EMN2 EMN3 EMN4 EMN5
## Ptprt    NA   NA   NA   NA   NA
## Tshz2    NA   NA   NA   NA   NA
## Bnc2     NA   NA   NA   NA   NA
## Grik1    NA   NA   NA   NA   NA
## Rbfox1   NA   NA   NA   NA   NA
for(ii in 1:nrow(ShE.Q)){
  ShE.Q[ii,] <- shannon.entropy(as.vector(as.matrix(mat.n[ii,1:16])),
                               returnH = FALSE, returnQ = TRUE) 
}
ShE.Q <- as.data.frame(ShE.Q)

for(nn in 1:ncol(ShE.Q)){
  ShE.Q[,nn][is.infinite(ShE.Q[,nn])] <- 12
}
sum(is.na(ShE.Q))
## [1] 0
sum(is.infinite(ShE.Q$EMN3))
## [1] 0
ShE.Q[1:5,1:5]
##            EMN1     EMN2     EMN3     EMN4     EMN5
## Ptprt  5.634029 6.100235 7.764596 9.663404 9.088706
## Tshz2  6.065198 6.315860 7.941043 9.370651 8.299668
## Bnc2   5.740055 6.005709 6.785789 6.421083 5.926986
## Grik1  5.886464 5.730371 6.466703 6.836617 7.924873
## Rbfox1 7.212261 7.221343 7.566928 8.021163 9.011608
ShE.Q %>% arrange(EMN1) %>% head(8)
##                EMN1     EMN2     EMN3     EMN4     EMN5     IMN1      IMN2
## Dsc3       3.764107 4.915328 8.749315 8.880889 7.554829 8.159380  8.420940
## Dock2      4.061433 4.298919 8.236083 8.946709 5.261573 9.774764 10.477792
## Caln1      4.155643 5.198258 8.979109 9.734998 7.155134 8.717892  8.528550
## Tmem163    4.203885 3.786503 7.747804 7.263834 7.865115 8.721184  9.312186
## Pi15       4.390661 5.298218 8.973564 8.692491 7.781209 8.677110  9.090279
## Tmem132cos 4.395585 6.244501 9.549886 9.268456 8.187378 9.596800  9.483146
## Insyn2b    4.521230 4.610104 8.926992 8.814650 5.447574 9.643589 10.347058
## Tmem132c   4.591526 6.362202 9.608157 8.446722 8.762290 9.528402  9.583124
##                 IMN3     IMN4      IN1       IN2      IN3    IPAN1    IPAN2
## Dsc3        8.963182 8.055549 8.755171  7.573888 6.714365 8.114485 7.755140
## Dock2      11.757571 8.593396 8.972257  8.372613 7.197920 9.200830 7.933046
## Caln1       8.916975 9.061784 8.808725  8.841002 4.334564 8.527540 9.309343
## Tmem163     8.859407 8.272979 9.233480 10.040841 7.600900 8.659436 8.781755
## Pi15        8.194369 8.919823 7.809301  6.299029 7.667099 8.795553 8.236121
## Tmem132cos 10.760865 9.588758 8.562376  8.565222 4.727123 9.125146 6.488263
## Insyn2b     9.723543 9.134953 8.689117  9.326240 7.061131 8.917851 8.101906
## Tmem132c    9.174982 8.877341 8.322807  9.067038 5.300405 8.633030 6.702962
##                IPAN3    IPAN4
## Dsc3       12.000000 8.003080
## Dock2      12.000000 7.569130
## Caln1      10.180289 8.576720
## Tmem163     7.093112 8.893623
## Pi15        9.457682 7.748585
## Tmem132cos  6.754561 8.110178
## Insyn2b     8.265891 7.452693
## Tmem132c    6.577545 8.358975
ShE.Q %>% arrange(IPAN1) %>% head(8)
##           EMN1      EMN2      EMN3      EMN4      EMN5     IMN1     IMN2
## Btg4  8.492209 12.000000 12.000000 12.000000 12.000000 8.021425 7.142348
## Itgb6 7.933657  8.291203  7.294544  9.327260  9.406877 8.354383 8.282223
## Dapk2 7.928384  8.544685  8.321872  8.845453  7.927003 8.263939 8.383973
## Layn  7.610403  6.814437  8.226534 12.000000  7.609021 7.246876 8.066515
## Kit   7.380907  7.098536  6.225324 12.000000  7.478922 7.116777 6.616599
## Otof  7.624147  8.147214  7.149723  6.548392  7.943731 7.582136 8.079476
## Smad6 8.796843  9.998357  8.998357  8.715843 12.000000 8.673941 8.668697
## Oc90  8.632864  6.668358  8.834504  7.552831 12.000000 7.969786 9.089238
##           IMN3      IMN4       IN1       IN2       IN3    IPAN1    IPAN2
## Btg4  7.102310  6.512263 12.000000  5.705997 12.000000 1.068297 5.763363
## Itgb6 7.508182  9.234286  8.617323 12.000000 12.000000 1.224553 5.243536
## Dapk2 8.343975  7.754658  6.726031  6.629982  6.084370 1.581388 7.781798
## Layn  8.026477  7.436431  6.819467 12.000000  5.761401 1.740145 6.949955
## Kit   6.897826 12.000000 12.000000 12.000000 12.000000 1.954451 4.979762
## Otof  7.363591  7.188595  8.152244  7.962941 12.000000 2.007532 7.606490
## Smad6 9.212917 12.000000  9.588638  8.814697  4.653266 2.119263 6.296300
## Oc90  7.051353  7.459858 12.000000  7.651685 12.000000 2.185270 6.127824
##           IPAN3     IPAN4
## Btg4  12.000000 12.000000
## Itgb6  7.778662  7.542432
## Dapk2 12.000000  8.378327
## Layn  12.000000 12.000000
## Kit   12.000000 12.000000
## Otof   7.313583  8.395054
## Smad6  5.594141  8.248716
## Oc90   6.004234  7.085704
ShE.Q %>% arrange(EMN5) %>% head(8)
##             EMN1     EMN2      EMN3     EMN4     EMN5      IMN1      IMN2
## Nfatc1  9.786706 9.766938  7.036251 4.006556 1.637709  8.400180  7.441072
## Egfros  7.023554 6.591335  9.581713 3.945756 2.161482 11.300388  8.837435
## Egfr    7.128813 5.996650  9.037511 4.288033 2.317219 10.170273  9.706171
## Fut9   11.254590 9.017041  8.022038 5.325862 2.348509 10.784072 12.073257
## Ntn1    8.567217 8.018644  8.716150 5.462094 2.671150  7.000452 10.968364
## Kl     10.046038 8.442420  7.122169 4.482818 2.858706  7.991823  8.696177
## Lmo7    9.712149 9.692376 10.691113 7.411904 3.048897  9.019270  9.946268
## Kcnip1  8.446853 7.158625  6.896833 3.589648 3.171838 10.604803  8.727645
##             IMN3      IMN4       IN1       IN2       IN3     IPAN1     IPAN2
## Nfatc1  9.566040  6.982510  9.941761  9.167820 12.000000  8.851137  8.810835
## Egfros  7.799586  6.478359  8.758493 12.000000 12.000000  9.987253 10.624615
## Egfr    6.941407  6.675475 12.000000  8.271019 12.000000  8.689391 10.493963
## Fut9   12.000000  7.291223  9.412990  9.637611 12.000000 11.055035  9.281160
## Ntn1    6.770621  7.025734  8.308096  8.532718 12.000000  7.954415  7.593738
## Kl      8.656139 12.000000  9.031860  6.676766 12.000000  9.261736 12.000000
## Lmo7   12.000000 12.000000  4.981694  7.510622  6.641859  6.611811  5.769740
## Kcnip1  8.011760 12.000000  8.386489  9.609204  6.749267  8.808044  9.252219
##            IPAN3     IPAN4
## Nfatc1 12.000000 12.000000
## Egfros 12.000000  9.416041
## Egfr   12.000000 12.000000
## Fut9    4.914329  5.385352
## Ntn1    7.883360  7.647129
## Kl      5.616417  8.690525
## Lmo7   12.000000  7.359184
## Kcnip1 12.000000 10.041810
VlnPlot(test1.seur, features = c("Dsc3","Dock2","Caln1","Tmem163"), group.by = "intAnno1", assay = "RNA", ncol = 2, cols = color.A1) 

VlnPlot(test1.seur, features = c("Btg4","Itgb6","Dapk2","Layn"), group.by = "intAnno1", assay = "RNA", ncol = 2, cols = color.A1) 

par(mfrow=c(4,4))
lapply(colnames(ShE.Q),function(x){
  
  plot(sort(ShE.Q[,x]), 
     main= x,
     ylab = "Entropy Score - Q|t",
     xlab = "Sorted Filtered Markers")
  abline(h=4)
  abline(h=5)
  abline(h=6)
  abline(h=7)
  abline(h=8)
})

## [[1]]
## NULL
## 
## [[2]]
## NULL
## 
## [[3]]
## NULL
## 
## [[4]]
## NULL
## 
## [[5]]
## NULL
## 
## [[6]]
## NULL
## 
## [[7]]
## NULL
## 
## [[8]]
## NULL
## 
## [[9]]
## NULL
## 
## [[10]]
## NULL
## 
## [[11]]
## NULL
## 
## [[12]]
## NULL
## 
## [[13]]
## NULL
## 
## [[14]]
## NULL
## 
## [[15]]
## NULL
## 
## [[16]]
## NULL
markers.filt1 <- markers.new %>% filter(gene %in% markers.n)

# add avg.expression
markers.filt1$avg.exp <- "NA"
for(ii in 1:nrow(markers.filt1)){
  markers.filt1$avg.exp[ii] <- round(mat.n[markers.filt1$gene[ii],markers.filt1$cluster[ii]],6)
}


# add ShE.H
markers.filt1$ShE.H <- "NA"
for(ii in 1:nrow(markers.filt1)){
  markers.filt1$ShE.H[ii] <- round(mat.n[markers.filt1$gene[ii],"ShE.H"],6)
}


# add ShE.Q
markers.filt1$ShE.Q <- "NA"
for(ii in 1:nrow(markers.filt1)){
  markers.filt1$ShE.Q[ii] <- round(ShE.Q[markers.filt1$gene[ii],markers.filt1$cluster[ii]],6)
}

markers.filt1 <- markers.filt1 %>% arrange(cluster, ShE.Q)
dim(markers.new)
## [1] 10134     7
dim(markers.filt1)
## [1] 8098   10
head(markers.new)
##   p_val avg_log2FC pct.1 pct.2 p_val_adj cluster   gene
## 1     0   1.824018 0.953 0.335         0    EMN1  Ptprt
## 2     0   1.668551 0.981 0.495         0    EMN1  Tshz2
## 3     0   1.585436 0.938 0.319         0    EMN1   Bnc2
## 4     0   1.384775 0.909 0.397         0    EMN1  Grik1
## 5     0   1.301007 1.000 0.793         0    EMN1 Rbfox1
## 6     0   1.107801 0.992 0.864         0    EMN1  Negr1
head(markers.filt1)
##           p_val avg_log2FC pct.1 pct.2     p_val_adj cluster       gene
## 1  0.000000e+00  0.3833830 0.256 0.027  0.000000e+00    EMN1       Dsc3
## 2  0.000000e+00  0.5050384 0.338 0.055  0.000000e+00    EMN1      Dock2
## 3  0.000000e+00  0.6942665 0.431 0.049  0.000000e+00    EMN1      Caln1
## 4 3.401898e-257  0.3335389 0.237 0.057 6.951438e-253    EMN1    Tmem163
## 5  0.000000e+00  0.3519255 0.251 0.032  0.000000e+00    EMN1       Pi15
## 6  0.000000e+00  0.4348302 0.278 0.026  0.000000e+00    EMN1 Tmem132cos
##    avg.exp    ShE.H    ShE.Q
## 1 0.424829 2.662889 3.764107
## 2 0.601381 2.558888 4.061433
## 3 0.779451 2.605756 4.155643
## 4 0.436328 2.527474 4.203885
## 5 0.401343 2.994079 4.390661
## 6 0.477657 2.831334 4.395585
#write.csv(ShE.Q, "test_CREmarkers/matrix.ShE.Q.csv")
#write.csv(markers.filt1, "test_CREmarkers/markers.filt2679.Qsorted.csv")

set cutoff

table(markers.new$cluster)
## 
##  EMN1  EMN2  EMN3  EMN4  EMN5  IMN1  IMN2  IMN3  IMN4   IN1   IN2   IN3 IPAN1 
##   356   663   329   610   777   377   530   943   884   556   801   372  1148 
## IPAN2 IPAN3 IPAN4 
##   622   501   665
table(markers.filt1$cluster)
## 
##  EMN1  EMN2  EMN3  EMN4  EMN5  IMN1  IMN2  IMN3  IMN4   IN1   IN2   IN3 IPAN1 
##   321   589   285   465   568   323   449   733   685   462   582   278   950 
## IPAN2 IPAN3 IPAN4 
##   505   378   525
markers.filt1 %>% filter(ShE.Q < 6) %>%  group_by(cluster)  %>% summarise(n=n())
## # A tibble: 16 x 2
##    cluster     n
##    <fct>   <int>
##  1 EMN1       48
##  2 EMN2       42
##  3 EMN3       30
##  4 EMN4       46
##  5 EMN5       79
##  6 IMN1       38
##  7 IMN2       39
##  8 IMN3       75
##  9 IMN4       91
## 10 IN1        88
## 11 IN2        84
## 12 IN3        69
## 13 IPAN1     176
## 14 IPAN2      96
## 15 IPAN3      87
## 16 IPAN4      91
markers.filt1 %>% filter(ShE.H < 3) %>%  group_by(cluster)  %>% summarise(n=n())
## # A tibble: 16 x 2
##    cluster     n
##    <fct>   <int>
##  1 EMN1       17
##  2 EMN2       13
##  3 EMN3       10
##  4 EMN4       23
##  5 EMN5       37
##  6 IMN1       13
##  7 IMN2       16
##  8 IMN3       23
##  9 IMN4       40
## 10 IN1        43
## 11 IN2        43
## 12 IN3        33
## 13 IPAN1      76
## 14 IPAN2      54
## 15 IPAN3      48
## 16 IPAN4      52
markers.filt1 %>% filter(ShE.H < 3 & ShE.Q < 6) %>%  group_by(cluster)  %>% summarise(n=n())
## # A tibble: 16 x 2
##    cluster     n
##    <fct>   <int>
##  1 EMN1       16
##  2 EMN2       12
##  3 EMN3        9
##  4 EMN4       22
##  5 EMN5       34
##  6 IMN1       12
##  7 IMN2       13
##  8 IMN3       20
##  9 IMN4       34
## 10 IN1        41
## 11 IN2        40
## 12 IN3        31
## 13 IPAN1      72
## 14 IPAN2      53
## 15 IPAN3      46
## 16 IPAN4      49
#markers.filt2 <- markers.filt1 %>% filter(ShE.H < 3) %>% group_by(cluster) %>% arrange(cluster,ShE.Q)
#markers.filt2
markers.filt2 <- markers.filt1 %>% group_by(cluster) %>% arrange(cluster,ShE.Q) %>% do(head(.,n=60))
markers.filt2 %>% head(8)
## # A tibble: 8 x 10
## # Groups:   cluster [1]
##       p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene    avg.exp ShE.H ShE.Q
##       <dbl>      <dbl> <dbl> <dbl>     <dbl> <fct>   <chr>   <chr>   <chr> <chr>
## 1 0              0.383 0.256 0.027 0         EMN1    Dsc3    0.4248~ 2.66~ 3.76~
## 2 0              0.505 0.338 0.055 0         EMN1    Dock2   0.6013~ 2.55~ 4.06~
## 3 0              0.694 0.431 0.049 0         EMN1    Caln1   0.7794~ 2.60~ 4.15~
## 4 3.40e-257      0.334 0.237 0.057 6.95e-253 EMN1    Tmem163 0.4363~ 2.52~ 4.20~
## 5 0              0.352 0.251 0.032 0         EMN1    Pi15    0.4013~ 2.99~ 4.39~
## 6 0              0.435 0.278 0.026 0         EMN1    Tmem13~ 0.4776~ 2.83~ 4.39~
## 7 2.12e-276      0.263 0.204 0.036 4.33e-272 EMN1    Insyn2b 0.3196~ 2.80~ 4.52~
## 8 0              0.683 0.431 0.046 0         EMN1    Tmem13~ 0.7571~ 3.02~ 4.59~
pp.filt2 <- lapply(levels(GEX.seur$intAnno1), function(xx){
  
  DotPlot(GEX.seur, features = (markers.filt2 %>% filter(cluster==xx))$gene, group.by = "intAnno1",
                         cols = c("midnightblue","darkorange1"))  + labs(title=paste0(xx," - candidate CRE genes(entropy Q|t top40)") )+
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 9.15))+ scale_y_discrete(limits=rev)
  
})
#pp.filt2
markers.filt3 <- markers.filt1 %>% filter(pct.1>0.2) %>% group_by(cluster) %>% arrange(cluster,ShE.Q) %>% do(head(.,n=60))
markers.filt3 %>% head(8)
## # A tibble: 8 x 10
## # Groups:   cluster [1]
##       p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene    avg.exp ShE.H ShE.Q
##       <dbl>      <dbl> <dbl> <dbl>     <dbl> <fct>   <chr>   <chr>   <chr> <chr>
## 1 0              0.383 0.256 0.027 0         EMN1    Dsc3    0.4248~ 2.66~ 3.76~
## 2 0              0.505 0.338 0.055 0         EMN1    Dock2   0.6013~ 2.55~ 4.06~
## 3 0              0.694 0.431 0.049 0         EMN1    Caln1   0.7794~ 2.60~ 4.15~
## 4 3.40e-257      0.334 0.237 0.057 6.95e-253 EMN1    Tmem163 0.4363~ 2.52~ 4.20~
## 5 0              0.352 0.251 0.032 0         EMN1    Pi15    0.4013~ 2.99~ 4.39~
## 6 0              0.435 0.278 0.026 0         EMN1    Tmem13~ 0.4776~ 2.83~ 4.39~
## 7 2.12e-276      0.263 0.204 0.036 4.33e-272 EMN1    Insyn2b 0.3196~ 2.80~ 4.52~
## 8 0              0.683 0.431 0.046 0         EMN1    Tmem13~ 0.7571~ 3.02~ 4.59~
#write.csv(markers.filt3, "test_CREmarkers/markers.pct0.2_Qtop60.csv")

plot

pp.filt3 <- lapply(levels(GEX.seur$intAnno1), function(xx){
  
  DotPlot(GEX.seur, features = (markers.filt3 %>% filter(cluster==xx))$gene, group.by = "intAnno1",
                         cols = c("midnightblue","darkorange1"))  + labs(title=paste0(xx," - candidate CRE genes(pct>20%, entropy Q|t top60)") )+
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 9.15))+ scale_y_discrete(limits=rev)+ 
  scale_color_gradientn(colours  = material.heat(100))
  
})
pp.filt3
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

## 
## [[8]]

## 
## [[9]]

## 
## [[10]]

## 
## [[11]]

## 
## [[12]]

## 
## [[13]]

## 
## [[14]]

## 
## [[15]]

## 
## [[16]]

ppref.filt3 <- lapply(levels(GEX.seur$intAnno1), function(xx){
  
  DotPlot(ref.seur, features = (markers.filt3 %>% filter(cluster==xx))$gene, group.by = "Anno2",
                         cols = c("midnightblue","darkorange1"))  + labs(title=paste0(xx," - candidate CRE genes(check in NatNeur2021)") )+
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 9.15))+ scale_y_discrete(limits=rev)+ 
  scale_color_gradientn(colours  = material.heat(100))
  
})
ppref.filt3
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

## 
## [[8]]

## 
## [[9]]

## 
## [[10]]

## 
## [[11]]

## 
## [[12]]

## 
## [[13]]

## 
## [[14]]

## 
## [[15]]

## 
## [[16]]

xx=16
  cowplot::plot_grid(
    pp.filt3[[xx]],
    ppref.filt3[[xx]],
    ncol = 1
  )